library(dataRetrieval) #read in necessary libraries
library(plotly)
library(dplyr)
library(MASS)
library(knitr)
library(boot)
library(ggplot2)
library(data.table)
library(formattable)
library(tidyr)
TSS_Caz <- read.csv('~/AWQP/TSS.csv') %>% 
  dplyr::select(TSS..mg.L., Name, TSS) #read in data for each persons TSS Standards
mean_Caz = mean(TSS_Caz$TSS..mg.L.)
 RMSE_Caz <-  sqrt(mean((TSS_Caz$TSS - TSS_Caz$TSS..mg.L.)^2)) #find RSME
sd_Caz <- sd(TSS_Caz$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Caz <- sd(TSS_Caz$TSS..mg.L.) / mean(TSS_Caz$TSS..mg.L.) * 100 #find cv

TSS_Justin <- read.csv('~/AWQP/TSS_Justin.csv') %>% 
  dplyr::select(TSS..mg.L., Name, TSS)
mean_Justin = mean(TSS_Justin$TSS..mg.L.)
 RMSE_Justin <-  sqrt(mean((TSS_Justin$TSS - TSS_Justin$TSS..mg.L.)^2)) #find RSME
sd_Justin <- sd(TSS_Justin$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Justin <- sd(TSS_Justin$TSS..mg.L.) / mean(TSS_Justin$TSS..mg.L.) * 100 #find cv

TSS_Melina <- read.csv('~/AWQP/TSS_Melina.csv') %>% 
  dplyr::select(TSS..mg.L., Name, TSS)
mean_Mel = mean(TSS_Melina$TSS..mg.L.)
 RMSE_Mel <-  sqrt(mean((TSS_Melina$TSS - TSS_Melina$TSS..mg.L.)^2)) #find RSME
sd_Mel <- sd(TSS_Melina$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Mel <- sd(TSS_Melina$TSS..mg.L.) / mean(TSS_Melina$TSS..mg.L.) * 100

TSS_Mia <- read.csv('~/AWQP/TSS_Mia_.csv') %>% 
  dplyr::select(TSS..mg.L., Name, TSS) %>%
  na.omit()
 mean_Mia = mean(TSS_Mia$TSS..mg.L.)
 RMSE_Mia <-  sqrt(mean((TSS_Mia$TSS - TSS_Mia$TSS..mg.L.)^2)) #find RSME
sd_Mia <- sd(TSS_Mia$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Mia <- sd(TSS_Mia$TSS..mg.L.) / mean(TSS_Mia$TSS..mg.L.) * 100 

TSS_Morgan <- read.csv('~/AWQP/Morgan_TSS.csv') %>% 
  dplyr::select(TSS..mg.L., Name, TSS) %>%
  na.omit()
 mean_Morgan = mean(TSS_Morgan$TSS..mg.L.)
 RMSE_Morgan <-  sqrt(mean((TSS_Morgan$TSS - TSS_Morgan$TSS..mg.L.)^2)) #find RSME
sd_Morgan <- sd(TSS_Morgan$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Morgan <- sd(TSS_Morgan$TSS..mg.L.) / mean(TSS_Morgan$TSS..mg.L.) * 100 

TSS_bind1 <- rbind(TSS_Caz, TSS_Justin, TSS_Melina, TSS_Mia, TSS_Morgan) %>% #combine individual data sets
  na.omit() %>% 
  dplyr::select(TSS..mg.L., Name)
TSS <- TSS_bind1 %>%
 group_by(Name) %>%
  mutate(row = row_number()) %>%
  tidyr::pivot_wider(names_from = Name, values_from = TSS..mg.L.) %>%
   dplyr::select(-row)
tab_stats <- matrix(c(mean_Caz,sd_Caz, RMSE_Caz, cv_Caz, mean_Justin, sd_Justin, RMSE_Justin, cv_Justin, mean_Mel, sd_Mel, RMSE_Mel, cv_Mel, mean_Mia,sd_Mia, RMSE_Mia, cv_Mia, mean_Morgan,sd_Morgan, RMSE_Morgan, cv_Morgan), ncol=4, byrow=TRUE) #create matrix for table
colnames(tab_stats) <- c('Mean(ppm)','Standard Deviation(ppm)','Root Mean Square Error(ppm)', 'Coefficient of Variation(%)') #label columns
rownames(tab_stats) <- c('Caz','Justin','Melina', 'Mia', 'Morgan') #label rows
table_stats <- as.table(tab_stats) 
  
formattable(table_stats) %>%
  kable(caption = "Summary Statistics") #format table
Summary Statistics
Mean(ppm) Standard Deviation(ppm) Root Mean Square Error(ppm) Coefficient of Variation(%)
Caz 57.27273 3.595733 8.040303 6.278263
Justin 65.33333 8.344437 17.256239 12.772098
Melina 56.29630 5.386311 8.089011 9.567789
Mia 60.37037 7.895928 12.765695 13.079145
Morgan 43.03030 6.403913 9.265991 14.882333
fig <- plot_ly(TSS_bind1, x = ~Name, y = ~TSS..mg.L., color = ~Name, colors =  c("red", "blue", "purple", 'green4', 'orange2')) #select values for plotly, assign colors
fig <- fig %>% add_markers(marker = list(line = list(color = ~Name, width = 1))) 
fig <- fig %>% layout(
    title = "TSS Standards", #title
    xaxis = list(domain = c(0.1, 1)),
    yaxis = list(title = "TSS (mg/L)"),
    updatemenus = list(
      list(
        y = 0.8,
        buttons = list(

          list(method = "restyle", #set up box plot
               args = list("type", "box"),
               label = "Box plot" 
              ),

          list(method = "restyle", #set up violin plot
               args = list("type", "violin"),
               label = "Violin"
               )))
    ))

fig
cols <- c("red", "blue", "purple", 'green4', 'orange2')  #attach colors for each person
# Basic density plot in ggplot2
ggplot( TSS_bind1, aes(x = TSS..mg.L., colour = Name)) + #sort colors for names
  geom_density(lwd = 1.2, linetype = 1) + 
  scale_color_manual(values = cols) +  #use previous set colors
  labs(x = 'TSS mg/L', y = 'Density',
       title = 'TSS Kernel Density Estimation') #label the graph

mean.function <- function(x, index) {
  d <- x[index]     # This first line will go in ever bootstrap function you make.
  return(mean(d))  
}
set.seed(50)
BootDist_C <- boot(data = TSS_Caz$TSS..mg.L., mean.function, R=10000)
low_C <- quantile( BootDist_C$t, probs=(.025))
upper_C <- quantile( BootDist_C$t, probs= (.975))
TSS_Morgan <- read.csv('~/AWQP/Morgan_TSS.csv') %>% 
  dplyr::select(TSS..mg.L., Name, TSS) %>%
  na.omit()
 mean_Morgan = mean(TSS_Morgan$TSS..mg.L.)
 RMSE_Morgan <-  sqrt(mean((TSS_Morgan$TSS - TSS_Morgan$TSS..mg.L.)^2)) #find RSME
sd_Morgan <- sd(TSS_Morgan$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Morgan <- sd(TSS_Morgan$TSS..mg.L.) / mean(TSS_Morgan$TSS..mg.L.) * 100 

BootDist_Morgan <- boot(data = TSS_Morgan$TSS..mg.L., statistic = mean.function, R=10000)
set.seed(50)
low_Morg <- quantile( BootDist_Morgan$t, probs=(.025))
upper_Morg <- quantile( BootDist_Morgan$t, probs= (.975))
mean.function <- function(x, index) {
  d <- x[index]     # This first line will go in ever bootstrap function you make.
  return(mean(d))  
}
set.seed(50)
BootDist_C1 <- boot(data = TSS_Caz$TSS..mg.L., mean.function, R=10000)
low_C1 <- quantile( BootDist_C$t, probs=(.005))
upper_C1 <- quantile( BootDist_C$t, probs= (.995))
BootDist_Morgan1 <- boot(data = TSS_Morgan$TSS..mg.L., statistic = mean.function, R=10000)
set.seed(50)
low_Morg1 <- quantile( BootDist_Morgan$t, probs=(.005))
upper_Morg1 <- quantile( BootDist_Morgan$t, probs= (.995))
Confidence Intervals
Mean (ppm) Lower CI (95%) Upper CI (95%) Lower CI (90%) Upper CI (90%)
Caz 57.27273 55.15152 59.39394 54.84848 60.00000
Justin 65.33333 60.66667 70.33333 59.33333 71.66667
Melina 56.29630 53.33333 59.62963 52.22222 60.74074
Mia 60.37037 55.55556 65.18519 54.07407 66.66667
Morgan 43.03030 39.69697 46.66667 38.78788 48.18182

```